library(vegan) library(permute) library(simba) library(cluster) library(ecodist) library(fields) ############################## #PCoA of GO gofac<-read.csv("gofactor.csv",header=T,row.names=1) gofac.gower<-daisy(gofac,metric="gower") gofac.gower gofac.pcoa<-cmdscale(gofac.gower,k=1000,eig=TRUE,add=TRUE) gofac.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs gofac.pcoa$eig[1:5]/sum(gofac.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(gofac.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1", ylab="PC2") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.gofac<-envfit(gofac.pcoa$points, gofac, perm=1000) #look at values vec.gofac #same plot just w/0 labels ordiplot(gofac.pcoa, choices=c(1,2), xlab='PC1', ylab="PC2") plot(vec.gofac, p.max=0.01, col="blue") ################################################### #PCoA of meth descrip meth<-read.csv("meth1000.csv",header=T,row.names=1) #logtransform meth.log<-log(meth+1) hist.plots(meth.log) meth.euc<-daisy(meth.log,metric="euclidean") meth.pcoa<-cmdscale(meth.euc,k=1000,eig=TRUE,add=TRUE) meth.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs meth.pcoa$eig[1:5]/sum(meth.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(meth.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (59.3%)", ylab="PC2 (29.6%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.meth<-envfit(meth.pcoa$points, meth.log, perm=1000) #look at values vec.meth #same plot just w/0 labels ordiplot(meth.pcoa, choices=c(1,2), xlab='PC1 (59.3%)', ylab="PC2 (29.6%)") plot(vec.meth, p.max=0.01, col="blue") #####want to do this again but taking out # of CGs #logtransform meth2.log<-log(meth[,4:9]+1) head(meth2.log) meth.euc2<-daisy(meth2.log,metric="euclidean") meth2.pcoa<-cmdscale(meth.euc2,k=1000,eig=TRUE,add=TRUE) meth2.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs meth2.pcoa$eig[1:5]/sum(meth2.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(meth2.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (79.3%)", ylab="PC2 (18.8%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.meth2<-envfit(meth2.pcoa$points, meth2.log, perm=1000) #look at values vec.meth2 #same plot just w/0 labels ordiplot(meth2.pcoa, choices=c(1,2), xlab='PC1 (79.3%)', ylab="PC2 (18.8%)") plot(vec.meth2, p.max=0.01, col="blue") #Try a CAP to compare meth and GO?? go.cap<-capscale(gofac.gower~., meth2.log) summary(go.cap) #interpretation: inertia corresponds to variance. Here 1.3% of the variance is explained by the methylation matrix #can I do it the other way? go.cap<-capscale(meth.euc2~., gofac) summary(go.cap) #interpretation, this only accounted for 2%, 2% of variance in meth could be explained by the GO ######Maybe add in tissue dist tis<-read.csv('tisdist1000.csv',header=T,row.names=1) tis.log<-log(tis+1) hist.plots(tis.log) tis.bray<-vegdist(tis.log,method='bray') tis.pcoa<-cmdscale(tis.bray,k=1000,eig=TRUE,add=TRUE) tis.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs tis.pcoa$eig[1:5]/sum(tis.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(meth2.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (16.5%)", ylab="PC2 (7.3%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.tis<-envfit(tis.pcoa$points, tis.log, perm=1000) #look at values vec.tis #same plot just w/0 labels ordiplot(tis.pcoa, choices=c(1,2), xlab='PC1 (16.5%)', ylab="PC2 (7.3%)") plot(vec.tis, p.max=0.01, col="blue") ##transpose it tis.log<-log(tis+1) tis.logt<-t(tis.log) tist.bray<-vegdist(tis.logt,method='bray') tist.pcoa<-cmdscale(tist.bray,k=5,eig=TRUE,add=TRUE) tist.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs tist.pcoa$eig[1:5]/sum(tist.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(tist.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (33.8%)", ylab="PC2 (24.9%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.tist<-envfit(tist.pcoa$points, tis.logt, perm=1000) #look at values vec.tist #same plot just w/0 labels ordiplot(tist.pcoa, choices=c(1,2), xlab='PC1 (16.5%)', ylab="PC2 (7.3%)") plot(vec.tist, p.max=0.005, col="blue") #Try a CAP to compare meth and tis?? methfortis<-read.csv('meth1000fortis.csv',header=T,row.names=1) methfortis.log<-log(methfortis+1) methfortis.euc<-daisy(methfortis.log,metric="euclidean") methfortis.cap<-capscale(tis.bray~., methfortis.log) summary(methfortis.cap) #interpretation: inertia corresponds to variance. Here 32.3% of the variance in the tissue distribution is explained by the methylation matrix, CAP1 explains 31.4%, while 1 and 2 explain 32% of the variance ##TO TEST SIGNIFICANCE (Monte Carlo global permutation tests) - several are available #first look at all constraints simultaneously (considering all environmental constraints at once) anova(methfortis.cap) #second we may want to look at each axis in turn - the test based on the first axis has max power against the alternative hypothesis that there is a SINGLE dominating gradient that determines the relation between species and environment. THis is a sequential test, so the second test just partiions out the first axis and tests the residual variance, then the third test partitions out the first and second etc. anova(methfortis.cap, by='axis') #finally we may want to look at each TERM in the model, performs a second signficance test for each term (constraining variable) anova(methfortis.cap,by='terms') #ORDINATION SCORES: there are 4 types of scores to look at (we discussed in class that WA scores are the most useful) #lets use WA scores to visualize the ordination plot(methfortis.cap,choices=c(1,2), type='points', display='wa', scaling=2) #or you can just plot site labels: plot(methfortis.cap,choices=c(1,2), type='n', display='wa', scaling=2) text(methfortis.cap,choices=c(1,2),labels=row.names(methfortis)) #interpreting the constrained ordination #canonical coefficients #these are eigenvector coefficients, "they are not very useful for interpretation" #intra-set scores round(intrasetcor(methfortis.cap),5) #inter-set scores (correlations between species-derived sample scores (WA scores)and environmental variables) round(intersetcor(methfortis.cap), 5) #biplot for explanatory variables #biplot scores for the constraints are given in the summary() of the ordination summary(methfortis.cap) #triplot plot(methfortis.cap,choices=c(1,2),display=c('wa','sp','bp'),scaling=2) ##partial cca? #######nothing too exciting here tis<-read.csv('tisdist1000.csv',header=T,row.names=1) tis.log<-log(tis+1) tis.bray<-vegdist(tis.log,method='bray') #separating methmatrix attach(methfortis.log) meth<-cbind(meth.exon,meth.intron,meth.total) desc<-cbind(num.exon,genelength,meanexonlength) detach(methfortis.log) # tis.pcca1<-cca(tis.log,meth,desc) summary(tis.pcca1) tis.pcca1<-cca(tis.log,desc,meth) summary(tis.pcca1) tis.pcca<-ordi.part(tis.log,meth,desc,method='cca') plot.ordi.part(tis.pcca,which='total') # gofac<-read.csv("gofactor1000fortisbinary.csv",header=T,row.names=1) head(gofac) go.pcca1<-ordi.part(gofac,meth,desc,method='cca') meth.pcca1<-ordi.part(methfortis,desc,gofac,method='cca') #PCOA of everything all<-read.csv("alldataforsinglego.csv",header=T,row.names=1) all.gower<-daisy(all,metric="gower") all.gower all.pcoa<-cmdscale(all.gower,k=808,eig=TRUE,add=TRUE) all.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs all.pcoa$eig[1:5]/sum(all.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(all.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1(6.5%)", ylab="PC2(3.6%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.all<-envfit(all.pcoa$points, all, perm=1000) #look at values vec.all #same plot just w/0 labels ordiplot(all.pcoa, choices=c(1,2), xlab='PC1', ylab="PC2") plot(vec.all, p.max=0.01, col="blue") points(col=('rainbow') #I need to try this again w/0 GO because it looks weird all<-read.csv("alldataforsinglego.csv",header=T,row.names=1) head(all) all2<-all[,2:7] head(all2) all.gower<-daisy(all2,metric="gower") all.gower all.pcoa<-cmdscale(all.gower,k=808,eig=TRUE,add=TRUE) all.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs all.pcoa$eig[1:5]/sum(all.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(all.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1(6.5%)", ylab="PC2(3.6%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.all<-envfit(all.pcoa$points, all2, perm=1000) #look at values vec.all #same plot just w/0 labels ordiplot(all.pcoa, choices=c(1,2), xlab='PC1', ylab="PC2") plot(vec.all, p.max=0.01, col="blue") #I need to try this again w/ GO and w/o meth all<-read.csv("alldataforsinglego_methlevel.csv",header=T,row.names=1) head(all) all2<-all[,2:5] head(all2) all.gower<-daisy(all2,metric="gower") all.gower all.pcoa<-cmdscale(all.gower,k=808,eig=TRUE,add=TRUE) all.pcoa #want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs all.pcoa$eig[1:5]/sum(all.pcoa$eig)*100 #look at ordination plot of PC and PC2 ordiplot(all.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1(6.5%)", ylab="PC2(3.6%)") #here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance vec.all<-envfit(all.pcoa$points, all2, perm=1000) #look at values vec.all #same plot just w/0 labels ordiplot(all.pcoa, choices=c(1,2), xlab='PC1', ylab="PC2") plot(vec.all, p.max=0.05, col="blue")